*Purpose of programme: Consistent education variable, imputation of missings 

version 14.2
clear all
set more off
set linesize 255


*Open clean datasets from data folder
***********************************
forvalues y=1980/2000{
use "data\orig\clean`y'.dta", clear
gen year=`y'
keep vsnr year ausbild
save help`y', replace
}


clear
use help1980
forvalues y=1981/2000{
append using help`y'
}
save help, replace


tab ausbild, missing
sort year
by year: tab ausbild, missing

replace ausbild=-1 if ausbild>6
bysort vsnr: egen maxausbild=max(ausbild)
tab maxausbild, miss

gen edu=2 if maxausbild>=2 & maxausbild<=4
replace edu=3 if maxausbild==5 | maxausbild==6
replace edu=1 if edu==.   /* all individuals with only missing are coded as low skill */

label define edu 1 "[1] None or only a school degree" 2 "[2] School and vocational" 3 "[3] Technical college / university"
label values edu edu
la var edu "Categorical variable for educational attainment"

* Years of schooling
gen school=10 if maxausbild<=1
replace school=13 if maxausbild==2 | maxausbild==3
replace school=15 if maxausbild==4
replace school=18 if maxausbild==5
replace school=19 if maxausbild==6
la var school "Years of schooling, constructed"
drop maxausbild

* Some descriptives
tab edu, missing
tab school, missing
tab year edu, row
tab edu, sum(school)


*keep one spell per person
sort vsnr year
keep if vsnr!=vsnr[_n-1]

*drop unnessary variables
drop year ausbild

* Compress and save
compress
sort vsnr
save "data\consistenteduc.dta", replace
	
*erase unneccesary data
erase help.dta
forvalues y=1980/2000{
erase help`y'.dta
}	

exit
